In this first excercise we will perform few basic operations with VESPUCCI. We will create a Module starting from few genes and then automatically extend it by adding more genes. We will have a look at gene and samples annotations. VESPUCCI is the gene expression database for grapevine and we can access it via its GraphQL interface, called COMPASS. The pyCOMPASS package is a Python package that wraps some functionalities to simplify communication with the COMPASS intereface.
%%html
<script src="https://cdn.plot.ly/plotly-latest.min.js"></script>
from pycompass import Connect, Compendium, Module, Platform, Experiment, BiologicalFeature, SampleSet, Sample, Plot, Annotation
from IPython.core.display import display, HTML
url = 'http://methada2020.uv.es:5555/graphql'
conn = Connect(url)
vv_compendium = None
for compendium in conn.get_compendia():
if compendium.compendium_name == 'vitis_vinifera':
vv_compendium = compendium
break
Let's print out the description for this compendium
print(vv_compendium.description)
Let's build our module starting from a bunch of top ranking genes from your previous excercise (contrast different cultivars)
gene_names = ['VIT_05s0094g00350','VIT_07s0031g02630','VIT_19s0015g02480','VIT_08s0007g08840','VIT_01s0026g00520','VIT_03s0017g02170','VIT_19s0014g05330',
'VIT_02s0154g00130','VIT_02s0025g04330','VIT_13s0067g00490','VIT_09s0002g01200','VIT_14s0030g00140','VIT_03s0063g00120','VIT_05s0029g01480',
'VIT_11s0052g01650','VIT_02s0087g01020','VIT_09s0070g00160','VIT_13s0019g02180','VIT_07s0095g00550','VIT_04s0008g06570','VIT_04s0069g00860',
'VIT_04s0210g00060','VIT_07s0104g00430','VIT_15s0107g00210','VIT_16s0039g00970','VIT_10s0003g01730','VIT_17s0000g07060','VIT_16s0100g00510',
'VIT_02s0154g00590']
genes = BiologicalFeature.using(vv_compendium).get(filter={'name_In': gene_names})
#module_1 = Module.using(vv_compendium).create(biofeatures=genes, normalization='legacy')
#module_1.write_to_file('module_1.vsp')
module_1 = Module.read_from_file('module_1.vsp', conn)
module_1.values
html_hm_module_1 = Plot(module_1).plot_heatmap()
display(HTML(html_hm_module_1))
vv_compendium.get_score_rank_methods(normalization='legacy')
dist_module_1 = Plot(module_1).plot_distribution(plot_type='biological_features_standard_deviation_distribution')
display(HTML(dist_module_1))
module_2 = Module.read_from_file('module_1.vsp', conn)
module_2.values
rank_genes = vv_compendium.rank_biological_features(module_1, rank_method="std")
new_gene_names = rank_genes['ranking']['id'][:10]
new_genes = BiologicalFeature.using(vv_compendium).get(filter={'id_In': new_gene_names})
module_2.add_biological_features(new_genes)
html_hm_module_2 = Plot(module_2).plot_heatmap()
display(HTML(html_hm_module_2))
module_3 = Module.difference(module_2, module_1, sample_sets=False)
html_hm_module_3 = Plot(module_3).plot_heatmap()
display(HTML(html_hm_module_3))
new_genes[6].name
html_annotation_gene = Annotation(BiologicalFeature.using(vv_compendium).get(filter={'name': 'VIT_05s0049g00760'})[0]).plot_network()
display(HTML(html_annotation_gene))
for s in module_3.sample_sets[0]:
display(HTML(Annotation(s).plot_network()))
for s in module_3.sample_sets[0]:
for t in Annotation(s).get_triples():
print(' -- '.join(t))
print()